##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
## R code to perform Monte Carlo simulation analyses for 
##  "Every Story Has a Beginning, Middle, and an End (But Not Always in That Order): 
##   Predicting duration dynamics in a unified framework"
##
##  Authors: Daina Chiba, Nils W. Metternich, and Michael D. Ward
##
##  Last modified: 2014-08-23
##  Contact Daina Chiba (daina.chiba@gmail.com) for any questions.
##~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# This script produces 
#
#  (1) Figure 3: Bias in Estimation (Figure3_mc_bias.pdf)
#
#  (2) Figure 4: Efficiency in Estimation (Figure4_mc_efficiency.pdf)
#

# > sessionInfo()
# R version 3.1.1 (2014-07-10)
# Platform: x86_64-apple-darwin13.1.0 (64-bit)


## Set the working directory if necessary

# setwd("your_path_to_the_folder")


# preparation -------------------------------------------------------------

rm(list=ls())

setwd("~/Documents/git/DurationEstimation/CMW_Replication")
require(SPDDD)
require(mvtnorm) # for rmvnorm()
require(grid)    # for unit()
require(ggplot2)


# Define functions to perform Monte Carlo simulations ---------------------

## function to return EV
rt1ev <- function(x.uniform, shape = 1, scale = 1){
  x.t1ev <- log(qweibull(x.uniform, shape = shape, scale = scale))
  return(x.t1ev)
}

## function to generate data
mc.dgp <- function(
  p1 = 1, p2 = 1, 
  b1 = c(1, 0), b2 = c(1, -1), g = c(1, -1),
  rho.mc = 0, n=samp.size){
  
  ## Split
  cure <- rep(0, times=n)
  cure[g[1] * x13 + g[2] * x3 + rlogis(n,location=0) > 0] <- 1
  
  ## Correlated errors
  vmat <- matrix(c(1, rho.mc, rho.mc, 1), nrow=2)
  if (rho.mc >= 0.8 | rho.mc <= -0.8) {vmat <- make.positive.definite(vmat)}
  v <- rmvnorm(n, c(0,0), vmat)
  u1 <- pnorm(v[,1]); u2 <- pnorm(v[,2])
  e1 <- rt1ev(u1, shape=p1)
  e2 <- rt1ev(u2, shape=p2)
  e3 <- rt1ev(u1, shape=p1)
  
  ## Pre-conflict Peace duration
  y1 <- exp(b1[1] * x1 + b1[2] * x3 + (1/p1) * e1)
  
  ## Conflict duration
  y2 <- exp(b2[1] * x2 + b2[2] + (1/p2) * e2)
  
  ## Post-conflict Peace duration
  y3 <- exp(b1[1] * x1 + b1[2] * x3 + (1/p1) * e3)
  
  ## Cured spells
  y1c <- y1; y1c[cure==1] <- max(y1)
  y3c <- y3; y3c[cure==1] <- max(y3)
  rc <- rep(0, times= n); rc[cure==1] <- 1
  
  p1.data <- data.frame(y1=y1c, t0=0, rc=rc, fo=1, lo=1, 
                        x1=x1, x2=x2, x3=x3, x13=x13, spellID=1:n)
  c2.data <- data.frame(y2=y2,  t0=0, rc=0,  fo=1, lo=1, 
                        x1=x1, x2=x2, x3=x3, x13=x13, spellID=1:n, prev.dur=y1)
  c2.data <- c2.data[cure==0,]
  p2.data <- data.frame(y3=y3c, t0=0, rc=rc, fo=1, lo=1, 
                        x1=x1, x2=x2, x3=x3, x13=x13, spellID=1:n, prev.dur=y2)
  p2.data <- p2.data[cure==0,]
  out <- list(p1.data = p1.data, c2.data = c2.data, p2.data = p2.data)
  return(out)
}

## function to estimate models

mc.est <- function(
  n.sim = 100, seed = 123456, rho.mc = 0,
  p1 = 1, p2 = 1, 
  b1 = c(1, 0), b2 = c(1, -1), g = c(1,-1)){
  
  b.1.1 <- b.1.3 <- b.2.2 <- b.2.3 <- b.3.1 <- b.3.3 <- b.p1 <- b.p2 <- b.p3 <- matrix(NA, nrow=n.sim, ncol=4)
  g.1 <- g.3 <- matrix(NA, nrow=n.sim, ncol=2)
  rho <- matrix(NA, nrow=n.sim, ncol=2)
  pb <- txtProgressBar()
  set.seed(seed)
  for(i in 1:n.sim){
    setTxtProgressBar(pb,i/n.sim)
    mc.data <- mc.dgp(b1=b1, b2=b2, g=g, p1=p1, p2=p2, rho.mc=rho.mc)
    res <- summary(estimate.SPDDD(y1 ~ x1 + x3, y2 ~ x2, y3 ~ x1 + x3, rc ~ x13 + x3,
                              distr1 = "weibull", distr2 = "weibull", data=mc.data, trace=F))
    ## beta for x1 in P1
    b.1.1[i,1] <- res $ Independent.Model $ PreConflict[2,1]
    b.1.1[i,2] <- res $ SUR.Model $ PreConflict[2,1]
    b.1.1[i,3] <- res $ Independent.Split.Model $ PreConflict[2,1]
    b.1.1[i,4] <- res $ SP.DDD $ PreConflict[2,1]
    ## beta for x3 in P1
    b.1.3[i,1] <- res $ Independent.Model $ PreConflict[3,1]
    b.1.3[i,2] <- res $ SUR.Model $ PreConflict[3,1]
    b.1.3[i,3] <- res $ Independent.Split.Model $ PreConflict[3,1]
    b.1.3[i,4] <- res $ SP.DDD $ PreConflict[3,1]
    ## beta for x2 in C2
    b.2.2[i,1] <- res $ Independent.Model $ Conflict[2,1]
    b.2.2[i,2] <- res $ SUR.Model $ Conflict[2,1]
    b.2.2[i,3] <- res $ Independent.Split.Model $ Conflict[2,1]
    b.2.2[i,4] <- res $ SP.DDD $ Conflict[2,1]
    ## beta for x1 in P2
    b.3.1[i,1] <- res $ Independent.Model $ PostConflict[2,1]
    b.3.1[i,2] <- res $ SUR.Model $ PostConflict[2,1]
    b.3.1[i,3] <- res $ Independent.Split.Model $ PostConflict[2,1]
    b.3.1[i,4] <- res $ SP.DDD $ PostConflict[2,1]
    ## beta for x3 in P2
    b.3.3[i,1] <- res $ Independent.Model $ PostConflict[3,1]
    b.3.3[i,2] <- res $ SUR.Model $ PostConflict[3,1]
    b.3.3[i,3] <- res $ Independent.Split.Model $ PostConflict[3,1]
    b.3.3[i,4] <- res $ SP.DDD$PostConflict[3,1]
    ## lnp1
    b.p1[i,1] <- res $ Independent.Model $ PreConflict[4,1]
    b.p1[i,2] <- res $ SUR.Model $ PreConflict[4,1]
    b.p1[i,3] <- res $ Independent.Split.Model $ PreConflict[4,1]
    b.p1[i,4] <- res $ SP.DDD $ PreConflict[4,1]
    ## lnp2
    b.p2[i,1] <- res $ Independent.Model $ Conflict[3,1]
    b.p2[i,2] <- res $ SUR.Model $ Conflict[3,1]
    b.p2[i,3] <- res $ Independent.Split.Model $ Conflict[3,1]
    b.p2[i,4] <- res $ SP.DDD $ Conflict[3,1]
    ## lnp3
    b.p3[i,1] <- res $ Independent.Model $ PostConflict[4,1]
    b.p3[i,2] <- res $ SUR.Model $ PostConflict[4,1]
    b.p3[i,3] <- res $ Independent.Split.Model $ PostConflict[4,1]
    b.p3[i,4] <- res $ SP.DDD $ PostConflict[4,1]
    ## gamma for x3
    g.1[i,1] <- res $ Independent.Split.Model $ Cure[2,1]
    g.1[i,2] <- res $ SP.DDD $ Cure[2,1]
    g.3[i,1] <- res $ Independent.Split.Model$Cure[3,1]
    g.3[i,2] <- res $ SP.DDD $ Cure[3,1]
    
    ## athrho
    rho[i,1] <- res $ SUR.Model $ ath.rho[1]
    rho[i,2] <- res $ SP.DDD $ ath.rho[1]
  }
  out <- list(b.1.1 = b.1.1, b.1.3 = b.1.3, b.2.2 = b.2.2, b.3.1 = b.3.1, b.3.3 = b.3.3,
              g.1 = g.1, g.3 = g.3, b.p1 = b.p1, b.p2 = b.p2, b.p3 = b.p3, rho=rho)
  return(out)
}

get.rmse <- function(vec, true){
  sqrt(mean((vec-true)^2))
}


# Main Monte Carlo routine ------------------------------------------------

## Generate Xs
samp.size <- 1000
set.seed(12345678)
vmat <- diag(5)
xmat <- rmvnorm(samp.size, rep(0,5), diag(5))
x1 <- pnorm(xmat[,1]) * 6 - 3
x2 <- pnorm(xmat[,2]) * 6 - 3
x3 <- pnorm(xmat[,3]) * 6 - 3
x13 <- pnorm(xmat[,4]) * 6 - 3

## 200 simulations for each value of rho. 
## Each line of code below takes about 90 minutes to run on my machine (29 hours in total).
mc.res.n9 <- mc.est(n.sim=200, rho.mc = -0.9); save(mc.res.n9, file="mcres.N9.rda")
mc.res.n8 <- mc.est(n.sim=200, rho.mc = -0.8); save(mc.res.n8, file="mcres.N8.rda")
mc.res.n7 <- mc.est(n.sim=200, rho.mc = -0.7); save(mc.res.n7, file="mcres.N7.rda")
mc.res.n6 <- mc.est(n.sim=200, rho.mc = -0.6); save(mc.res.n6, file="mcres.N6.rda")
mc.res.n5 <- mc.est(n.sim=200, rho.mc = -0.5); save(mc.res.n5, file="mcres.N5.rda")
mc.res.n4 <- mc.est(n.sim=200, rho.mc = -0.4); save(mc.res.n4, file="mcres.N4.rda")
mc.res.n3 <- mc.est(n.sim=200, rho.mc = -0.3); save(mc.res.n3, file="mcres.N3.rda")
mc.res.n2 <- mc.est(n.sim=200, rho.mc = -0.2); save(mc.res.n2, file="mcres.N2.rda")
mc.res.n1 <- mc.est(n.sim=200, rho.mc = -0.1); save(mc.res.n1, file="mcres.N1.rda")
mc.res0 <- mc.est(n.sim=200, rho.mc = 0.0); save(mc.res0, file="mcres0.rda")
mc.res.p1 <- mc.est(n.sim=200, rho.mc = 0.1); save(mc.res.p1, file="mcres.P1.rda")
mc.res.p2 <- mc.est(n.sim=200, rho.mc = 0.2); save(mc.res.p2, file="mcres.P2.rda")
mc.res.p3 <- mc.est(n.sim=200, rho.mc = 0.3); save(mc.res.p3, file="mcres.P3.rda")
mc.res.p4 <- mc.est(n.sim=200, rho.mc = 0.4); save(mc.res.p4, file="mcres.P4.rda")
mc.res.p5 <- mc.est(n.sim=200, rho.mc = 0.5); save(mc.res.p5, file="mcres.P5.rda")
mc.res.p6 <- mc.est(n.sim=200, rho.mc = 0.6); save(mc.res.p6, file="mcres.P6.rda")
mc.res.p7 <- mc.est(n.sim=200, rho.mc = 0.7); save(mc.res.p7, file="mcres.P7.rda")
mc.res.p8 <- mc.est(n.sim=200, rho.mc = 0.8); save(mc.res.p8, file="mcres.P8.rda")
mc.res.p9 <- mc.est(n.sim=200, rho.mc = 0.9); save(mc.res.p9, file="mcres.P9.rda")

## If you don't want to run, you can load the estimates from files.

# load("mcres0.rda")
# load("mcres.P1.rda")
# load("mcres.P2.rda")
# load("mcres.P3.rda")
# load("mcres.P4.rda")
# load("mcres.P5.rda")
# load("mcres.P6.rda")
# load("mcres.P7.rda")
# load("mcres.P8.rda")
# load("mcres.P9.rda")
# load("mcres.N1.rda")
# load("mcres.N2.rda")
# load("mcres.N3.rda")
# load("mcres.N4.rda")
# load("mcres.N5.rda")
# load("mcres.N6.rda")
# load("mcres.N7.rda")
# load("mcres.N8.rda")
# load("mcres.N9.rda")



# Illustrate bias: split vs. no-split models ------------------------------

## beta for X3
plot.mat.m1 <- data.frame(est = mc.res0$b.1.3[,1], id = "Without Split-Population")
plot.mat.m2 <- data.frame(est = mc.res0$b.1.3[,2], id = "Without Split-Population")
plot.mat.m3 <- data.frame(est = mc.res0$b.1.3[,3], id = "With Split-Population")
plot.mat.m4 <- data.frame(est = mc.res0$b.1.3[,4], id = "With Split-Population")
plot.mat <- rbind(plot.mat.m1, plot.mat.m2, plot.mat.m3, plot.mat.m4)

cbgFillPalette <- scale_fill_manual(values=c("gray80","black"))

g <- ggplot(plot.mat, aes(est, fill = id)) + geom_density(alpha = 0.8) + cbgFillPalette + theme_bw()
g <- g + theme(legend.position = c(.2,.85), 
               legend.background = element_rect(fill="white",colour="black"),
               legend.title = element_text(size=0), 
               legend.text = element_text(size = 10),
               legend.key.width = unit(1,"cm"), 
               axis.ticks = element_blank(), axis.line=element_line(colour=NA), 
               axis.title.y = element_text(size=12,angle=90), axis.title.x = element_text(size=12,angle=0),
               axis.text.y = element_text(size=11), axis.text.x = element_text(size=11))
g <- g + scale_x_continuous("Estimated coefficient for X3 (true value = 0)", limit=c(-2,0.5),expand=c(0,0))
print(g)
ggsave(file="Figure3_mc_bias.pdf",width=8,height=5)


# Illustrate (in)efficiency: SP vs SPDDD ----------------------------------

list.help <- list(mc.res.n9,mc.res.n8,mc.res.n7,mc.res.n6,mc.res.n5,mc.res.n4,mc.res.n3,
                  mc.res.n2,mc.res.n1,mc.res0,mc.res.p1,mc.res.p2,mc.res.p3,mc.res.p4,mc.res.p5,
                  mc.res.p6,mc.res.p7,mc.res.p8,mc.res.p9)

## b.1.3
(beta.id <- which(names(mc.res.n1)=="b.1.3"))
true.beta <- 0

table.mean <- table.rmse <- matrix(NA,4,length(list.help))

for(j in 1:4){
  for(i in 1:length(list.help)){
    pull.out <- list.help[[i]][[beta.id]][,j]
    table.mean[j,i] <- mean(pull.out) 
    table.rmse[j,i] <- get.rmse(pull.out, true.beta) 
  }
}

table.mean
table.rmse

## Ratio of RMSE

pdf(file="Figure4_mc_efficiency.pdf",width=6, height=4)
par(mar=c(2.5,2.5,0,2)+0.1,cex.lab=0.8,cex.axis=0.8,
    mgp=c(1.5,0.5,0), cex.main=0.5, cex=1,bg="white")
plot(1,1,ylim=c(1,2.5),xlim=c(1,19),ann=FALSE,axes=FALSE,type="n")
lines(1:19,table.rmse[3,]/table.rmse[4,],type="o",lty=1,pch=20,lwd=1)
axis(1,at=seq(1,19),seq(-0.9,0.9,by=.1))
axis(2,las= HORIZONTAL<-1) 
title(main="",xlab="Dependence between Durations",ylab="RMSE Ratio")
dev.off()

# End of file -------------------------------------------------------------
